From a12da5f6e948f86a07959df51fcd77bde2973a25 Mon Sep 17 00:00:00 2001 From: Debarshi Ray Date: Sun, 29 Apr 2018 23:57:42 +0200 Subject: [PATCH] build: Add scaffolding for SSE3 It's currently unused, but is useful to have it around when comparing different vectorization strategies. https://bugzilla.gnome.org/show_bug.cgi?id=795686 --- configure.ac | 41 ++++++++++++++++++++++++++++++++--------- meson.build | 6 ++++++ meson_options.txt | 1 + 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index 1dfdccd..8aa37c5 100644 --- a/configure.ac +++ b/configure.ac @@ -324,6 +324,10 @@ AC_ARG_ENABLE(sse2, [ --enable-sse2 enable SSE2 support (default=auto)],, enable_sse2=$enable_sse) +AC_ARG_ENABLE(sse3, + [ --enable-sse3 enable SSE3 support (default=auto)],, + enable_sse3=$enable_sse2) + AC_ARG_ENABLE(sse4_1, [ --enable-sse4_1 enable SSE4_1 support (default=auto)],, enable_sse4_1=$enable_sse) @@ -388,22 +392,40 @@ if test "x$enable_mmx" = xyes; then AC_MSG_WARN([The assembler does not support the SSE2 command set.]) ) - if test "x$enable_sse4_1" = xyes; then - BABL_DETECT_CFLAGS(sse4_1_flag, '-msse4.1') - SSE4_1_EXTRA_CFLAGS="$SSE_EXTRA_CFLAGS $sse4_1_flag" + if test "x$enable_sse3" = xyes; then + BABL_DETECT_CFLAGS(sse3_flag, '-msse3') + SSE3_EXTRA_CFLAGS="$SSE2_EXTRA_CFLAGS $sse3_flag" - AC_MSG_CHECKING(whether we can compile SSE4_1 code) + AC_MSG_CHECKING(whether we can compile SSE3 code) - CFLAGS="$CFLAGS $sse4_1_flag" + CFLAGS="$CFLAGS $sse3_flag" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("pmovzxbd %xmm0,%xmm1");])], - AC_DEFINE(USE_SSE4_1, 1, [Define to 1 if SSE4_1 assembly is available.]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("addsubpd %xmm0,%xmm1");])], + AC_DEFINE(USE_SSE3, 1, [Define to 1 if SSE3 assembly is available.]) AC_MSG_RESULT(yes) , - enable_sse4_1=no + enable_sse3=no AC_MSG_RESULT(no) - AC_MSG_WARN([The assembler does not support the SSE4_1 command set.]) + AC_MSG_WARN([The assembler does not support the SSE3 command set.]) ) + + if test "x$enable_sse4_1" = xyes; then + BABL_DETECT_CFLAGS(sse4_1_flag, '-msse4.1') + SSE4_1_EXTRA_CFLAGS="$SSE_EXTRA_CFLAGS $sse4_1_flag" + + AC_MSG_CHECKING(whether we can compile SSE4_1 code) + + CFLAGS="$CFLAGS $sse4_1_flag" + + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("pmovzxbd %xmm0,%xmm1");])], + AC_DEFINE(USE_SSE4_1, 1, [Define to 1 if SSE4_1 assembly is available.]) + AC_MSG_RESULT(yes) + , + enable_sse4_1=no + AC_MSG_RESULT(no) + AC_MSG_WARN([The assembler does not support the SSE4_1 command set.]) + ) + fi fi fi @@ -439,6 +461,7 @@ if test "x$enable_mmx" = xyes; then AC_SUBST(MMX_EXTRA_CFLAGS) AC_SUBST(SSE_EXTRA_CFLAGS) AC_SUBST(SSE2_EXTRA_CFLAGS) + AC_SUBST(SSE3_EXTRA_CFLAGS) AC_SUBST(SSE4_1_EXTRA_CFLAGS) AC_SUBST(F16C_EXTRA_CFLAGS) fi diff --git a/meson.build b/meson.build index 5795c96..4188019 100644 --- a/meson.build +++ b/meson.build @@ -163,6 +163,12 @@ if has_sse2 language: 'c') endif +has_sse3 = cc.has_argument('-msse3') and get_option('enable-sse3') +if has_sse3 + add_project_arguments( '-msse3', + language: 'c') +endif + has_sse41= cc.has_argument('-msse4.1') and get_option('enable-sse4_1') if has_sse41 add_project_arguments( '-msse4.1', diff --git a/meson_options.txt b/meson_options.txt index 51e5fdc..febd306 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,6 +1,7 @@ option('enable-mmx', type: 'boolean', value: true, description: 'enable MMX support') option('enable-sse', type: 'boolean', value: true, description: 'enable SSE support') option('enable-sse2', type: 'boolean', value: true, description: 'enable SSE2 support') +option('enable-sse3', type: 'boolean', value: true, description: 'enable SSE3 support') option('enable-sse4_1', type: 'boolean', value: true, description: 'enable SSE4.1 support') option('enable-f16c', type: 'boolean', value: true, description: 'enable hardware half-float support') option('with-docs', type: 'boolean', value: true) -- 2.30.2